---
title: "figure-3-and-12"
format: html
editor: visual
---

## Making Figure 12 (Won et al)

```{r}
library(devtools)
install.packages("lme4", type = "source")
install_github("naoki-egami/dsl", ref = "update-Aug2024", dependencies = TRUE, force = TRUE)
library(dsl)
library(janitor)
library(tidyverse)
library(broom)
library(jtools)
library(tidyverse)
library(ggplot2)
library(broom.mixed)
library(estimatr)
```

```{r}
machine_df <- read.csv(file = "~/Downloads/DSL images/WonetAl_result.csv")
human_df <- read.table(file="~/Downloads/DSL images/annot_test.txt", sep="\t", quote="", comment.char="")
human_df <- as.data.frame(human_df)
human_df <- human_df %>% row_to_names(row_number = 1)

machine_df['protest_h'] <- human_df['protest']

protest_human <- subset(human_df,protest == 1)

protest_machine <- subset(machine_df,protest_h == 1)

protest_machine['sign'] <- protest_human['sign']
protest_machine['photo'] <- protest_human['photo']
protest_machine['fire'] <- protest_human['fire']
protest_machine['police'] <- protest_human['police']
protest_machine['children'] <- protest_human['children']
protest_machine['group_20'] <- protest_human['group_20']
protest_machine['flag'] <- protest_human['flag']
protest_machine['night'] <- protest_human['night']
protest_machine['shouting'] <- protest_human['shouting']


# outcome var completely human generated

all_human_model = lm(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,data = protest_human)

# outcome var completely machine generated
all_machine_model = lm(formula = violence ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,data = protest_machine)

# SAMPLING
protest_dsl <- protest_machine
protest_dsl$row = 1:nrow(protest_dsl)
```

```{r}
set.seed(08544)
sample <- sample(protest_dsl$row, size = 1743)
protest_dsl$violence_truth <- protest_human$violence
protest_dsl$violence_truth[protest_dsl$row %in% sample] <- NA


dsl_model <- dsl(model = "lm", 
                 formula = violence_truth ~ sign + photo + fire + police + children + group_20 + flag + night + shouting,
           predicted_var = "violence_truth",
           prediction = "violence",
           data = protest_dsl, tuning = TRUE)
```

```{r}
m_tidy <- all_machine_model %>% tidy(conf.int = TRUE)
h_tidy <- all_human_model  %>% tidy(conf.int = TRUE)

summary(dsl_model)
dsl_tidy<- as.data.frame(summary(dsl_model))
dsl_tidy['term'] <- row.names(dsl_tidy)
row.names(dsl_tidy) <- c(1:10)
dsl_tidy['estimate'] <- dsl_tidy['Estimate']
dsl_tidy['std.error'] <- dsl_tidy$`Std. Error`
dsl_tidy['p.value'] <- dsl_tidy$`p value`
dsl_tidy['conf.low'] <- dsl_tidy$`CI Lower`
dsl_tidy['conf.high'] <- dsl_tidy$`CI Upper`
dsl_tidy <- dsl_tidy[,colnames(dsl_tidy) %in% colnames(h_tidy)]

dsl_tidy['term'] <- c("(Intercept)", "sign", "photo", "fire", "police","children", "group>20","flag","night", "shouting")
m_tidy['term'] <- c("(Intercept)", "sign", "photo", "fire", "police","children", "group>20","flag","night", "shouting")
h_tidy['term'] <- c("(Intercept)", "sign", "photo", "fire", "police","children", "group>20","flag","night", "shouting")


machine <- as.data.frame(m_tidy)
machine <- machine[,c("term", "estimate", "std.error", "p.value", "conf.low", "conf.high")]
human <- as.data.frame(h_tidy)
human <- human[,c("term", "estimate", "std.error", "p.value", "conf.low", "conf.high")]
alltogether <- rbind(machine, human, dsl_tidy)
alltogether$model <- c("ResNet", "ResNet", "ResNet", "ResNet", "ResNet", "ResNet", "ResNet", "ResNet", "ResNet", "ResNet", "Benchmark", "Benchmark", "Benchmark", "Benchmark","Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark", "DSL", "DSL", "DSL", "DSL", "DSL", "DSL", "DSL", "DSL", "DSL", "DSL")

alltogether %>%
  filter(term != "(Intercept)") %>%
  ggplot(aes(estimate, model, colour = model, shape = model)) + facet_wrap(~ term) +
  scale_shape_manual(values = 0:9) + 
  geom_point(show.legend = FALSE) +
  geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), show.legend = FALSE) +
  # add in a dotted line at zero
  geom_vline(xintercept = 0, lty = 2) + coord_cartesian(xlim = c(-0.1, 0.35))+
  labs(x = "Estimate of effect of variable on percieved level of violence",
       y = NULL,
       title = "Estimates and confidence intervals for predictors of perceived level of violence (applying DSL once with 600 annotations)") + theme(plot.title=element_text(face="bold")) + theme_bw() + scale_color_grey()



```

```{r}
alltogether$Model <- factor(alltogether$model, levels = c( "Benchmark","ResNet", "DSL"))

# Create the plot
alltogether %>%
  filter(term %in% c("photo", "group>20", "night")) %>%
  ggplot(aes(x = term, y = estimate, color = Model, group = Model, shape = Model)) +
  geom_point(position = position_dodge(width = 0.5), size = 3) +  # Add points for estimates
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, position = position_dodge(width = 0.5)) +  # Add error bars
  labs(
    x = "Independent variables",
    y = "Estimates",
    title = "Estimates and confidence intervals for predictors of perceived level of violence"
  ) +
  theme_bw() +  # Use a clean theme
  theme(
    axis.text.x = element_text(size = 14),  # Make x-axis labels larger and rotate
    axis.text.y = element_text(size = 14),  # Adjust y-axis labels size
    axis.title = element_text(size = 14),  # Adjust axis titles
    plot.title = element_text(size = 16),  # Adjust plot title size
    legend.title = element_text(size = 14),
    legend.text = element_text(size = 14)  # Adjust legend text size
  ) +
  scale_color_manual(values = c("DSL" = "tomato2", "ResNet" = "deepskyblue", "Benchmark" = "black")) +
  scale_shape_manual(values = c("DSL" = 16, "ResNet" = 18, "Benchmark" = 15))


```

```{r}
alltogether %>%
  filter(term != "(Intercept)") %>%
  ggplot(aes(x = term, y = estimate, color = model, group = model, shape = model)) +
  geom_point(position = position_dodge(width = 0.5), size = 3) +  # Add points for estimates
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, position = position_dodge(width = 0.5)) +  # Add error bars for confidence intervals
  labs(x = "Term", y = "Estimate and confidence interval", title = "Estimates and confidence intervals for predictors of perceived level of violence") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 14),  # Increase x-axis text size and rotate labels
        axis.text.y = element_text(size = 16),  # Increase y-axis text size
        axis.title.x = element_text(size = 16),  # Increase x-axis title size
        axis.title.y = element_text(size = 16),  # Increase y-axis title size
        plot.title = element_text(size = 18, face = "bold"),  # Increase plot title size
        legend.text = element_text(size = 16),  # Increase legend text size
        legend.title = element_text(size = 16),  # Increase legend title size
        panel.spacing = unit(1, "lines")) +  # Increased spacing between panels
  theme_bw() +  # Use a clean white background
  scale_color_manual(values = c("DSL" = "tomato2", "ResNet" = "deepskyblue", "Benchmark" = "black")) +
  scale_shape_manual(values = c("DSL" = 16, "ResNet" = 18, "Benchmark" = 15))

```

## Make final Figure 12

```{r}
alltogether %>% 
  filter(term %in% c("photo", "group>20", "night")) %>% 
  ggplot(aes(x = term, y = estimate, color = Model, group = Model, shape = Model)) +
  geom_point(position = position_dodge(width = 0.5), size = 3) + 
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, position = position_dodge(width = 0.5)) +  
  labs(
    x = "Independent variables", 
    y = "Estimates", 
    title = "Estimates and confidence intervals for predictors of perceived level of violence"
  ) +
  theme_bw() +  
  theme(
    axis.text.x = element_text(size = 14),  
    axis.text.y = element_text(size = 14),  
    axis.title = element_text(size = 14),  
    plot.title = element_text(size = 16),  
    legend.title = element_text(size = 14), 
    legend.text = element_text(size = 14)  
  ) +
  scale_color_manual(values = c("DSL" = "tomato2", "ResNet" = "deepskyblue", "Benchmark" = "black")) +
  scale_shape_manual(values = c("DSL" = 16, "ResNet" = 18, "Benchmark" = 15)) +
  
  annotate("text", x = "night", y = 0.05, label = "DSL", color = "tomato2", fontface = "bold", size = 5, hjust = -1.7) +
  annotate("text", x = "night", y = 0.1, label = "ResNet", color = "deepskyblue", fontface = "bold", size = 5, hjust = -0.2) +
  annotate("text", x = "night", y = 0.042, label = "Benchmark", color = "black", fontface = "bold", size = 5, hjust = 1.7) + theme(legend.position = "none")
```

## Making Figure 3 (Casas)

```{r}
itm <- read_csv("~/Downloads/DSL images/itm-or-tweets-w-img-fname.csv")
gpt4o <- read_csv("~/Downloads/DSL images/gpt4o.csv")
human_df <- itm[itm$img_fname %in% gpt4o$name,]
human_df <- human_df[human_df$img_fname %in% unique(human_df$img_fname),]
list <- intersect(unique(human_df$img_fname),unique(gpt4o$name))
human_df <- human_df[human_df$img_fname %in% list,]
gpt4o <- gpt4o[gpt4o$name %in% list,]
merged <- merge(human_df,gpt4o, by.x="img_fname", by.y="name")

# make emotion variables between 0 and 1
merged[,c("anger.x", "fear.x", "disgust.x", "sadness.x", "enthusiasm.x")] <- merged[,c("anger.x", "fear.x", "disgust.x", "sadness.x", "enthusiasm.x")]/10
merged[,c("anger.y", "fear.y", "disgust.y", "sadness.y", "enthusiasm.y")] <- merged[,c("anger.y", "fear.y", "disgust.y", "sadness.y", "enthusiasm.y")]/10

gpt_df <- merged
gpt_df <- gpt_df[,c("retweet_n", "followers_count", "friends_count", "prev_tweets", "time_control", "protest", "symbol", "anger.y", "fear.y", "disgust.y", "sadness.y", "enthusiasm.y")]
colnames(gpt_df) <- c("retweet_n", "followers_count", "friends_count", "prev_tweets", "time_control", "protest", "symbol", "anger", "fear", "disgust", "sadness", "enthusiasm")

only_df <- merged
only_df <- only_df[,c("retweet_n", "followers_count", "friends_count", "prev_tweets", "time_control", "protest", "symbol", "anger.x", "fear.x", "disgust.x", "sadness.x", "enthusiasm.x")]
colnames(only_df) <- c("retweet_n", "followers_count", "friends_count", "prev_tweets", "time_control", "protest", "symbol", "anger", "fear", "disgust", "sadness", "enthusiasm")



```

```{r}
gpt4_model <- lm(log(retweet_n +1) ~ followers_count +
                       friends_count + 
                       prev_tweets +time_control +
                       protest +
                       symbol +
                       anger +
                       fear +
                       disgust +
                       sadness +
                       enthusiasm, data = gpt_df) 

only_human_model <- lm(log(retweet_n+1) ~ followers_count +
                       friends_count + 
                       prev_tweets +time_control +
                       protest +
                       symbol +
                       anger +
                       fear +
                       disgust +
                       sadness +
                       enthusiasm, data = only_df) 

plot_summs(gpt4_model, only_human_model, model.names = c("GPT-4o predictions", "Human annotations"), plot.distributions = TRUE, rescale.distributions = TRUE, coefs = c("protest", "symbol", "anger", "fear", "disgust", "sadness", "enthusiasm"), legend.title = "Emotion annotations") 

```

```{r}
m_tidy <- gpt4_model %>% tidy(conf.int = TRUE)
h_tidy <- only_human_model  %>% tidy(conf.int = TRUE)

machine <- as.data.frame(m_tidy)
machine <- machine[,c("term", "estimate", "std.error", "p.value", "conf.low", "conf.high")]
human <- as.data.frame(h_tidy)
human <- human[,c("term", "estimate", "std.error", "p.value", "conf.low", "conf.high")]


alltogether <- rbind(machine, human)
alltogether$Model <- c("GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "GPT-4o", "Benchmark", "Benchmark", "Benchmark", "Benchmark","Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark", "Benchmark")

```

```{r}
alltogether <- subset(alltogether, term %in% c("protest", "symbol", "anger", "fear", "disgust", "sadness", "enthusiasm"))
```

```{r}
alltogether %>%
  filter(term != "(Intercept)") %>%
  ggplot(aes(x = term, y = estimate, color = Model, group = Model, shape = Model)) +   theme_bw() +
  geom_point(position = position_dodge(width = 0.5), size = 3) +  # Add points for estimates
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, position = position_dodge(width = 0.5)) +  
  labs(x = "Independent variables", y = "Estimates", title = "Estimates and confidence intervals for predictors of retweets") +
  theme(axis.text.x = element_text(size = 14), 
        axis.text.y = element_text(size = 14),  
        axis.title.x = element_text(size = 14),  
        axis.title.y = element_text(size = 14), 
        plot.title = element_text(size = 16),  
        legend.text = element_text(size = 14),  
        legend.title = element_text(size = 14),  
        panel.spacing = unit(1, "lines")) +  
  scale_color_manual(values = c("DSL" = "tomato2", "GPT-4o" = "deepskyblue", "Benchmark" = "black")) +
  scale_shape_manual(values = c("DSL" = 16, "GPT-4o" = 18, "Benchmark" = 15))

```

## Make final Figure 3

```{r}
alltogether %>%
  filter(term != "(Intercept)") %>%
  ggplot(aes(x = term, y = estimate, color = Model, group = Model, shape = Model)) +   theme_bw() +
  geom_point(position = position_dodge(width = 0.5), size = 3) +  # Add points for estimates
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0.2, position = position_dodge(width = 0.5)) +  # Add error bars for confidence intervals
  labs(x = "Independent variables", y = "Estimates", title = "Estimates and confidence intervals for predictors of retweets") +
  theme(axis.text.x = element_text(size = 14), 
        axis.text.y = element_text(size = 14), 
        axis.title.x = element_text(size = 14),  
        axis.title.y = element_text(size = 14), 
        plot.title = element_text(size = 16),  
        legend.text = element_text(size = 14),  
        legend.title = element_text(size = 14),  
        panel.spacing = unit(1, "lines")) +  
  scale_color_manual(values = c("DSL" = "tomato2", "GPT-4o" = "deepskyblue", "Benchmark" = "black")) +
  scale_shape_manual(values = c("DSL" = 16, "GPT-4o" = 18, "Benchmark" = 15)) +
  annotate("text", x = "enthusiasm", y = 0.27, label = "GPT-4o", color = "deepskyblue", fontface = "bold", size = 5, hjust = 0.2) +
  annotate("text", x = "enthusiasm", y = -0.02, label = "Benchmark", color = "black", fontface = "bold", size = 5, hjust = 0.5) + theme(legend.position = "none")
```
